import pandas as pd
import numpy as np
import os
import datetime
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import pytz
import itertools
import visualize
import utils
import pvlib
import cs_detection
import visualize_plotly as visualize
# import visualize
# from bokeh.plotting import output_notebook
# output_notebook()
%load_ext autoreload
%autoreload 2
np.set_printoptions(precision=4)
%matplotlib notebook
Read pickeld data from setup notebook.
nsrdb = cs_detection.ClearskyDetection.read_pickle('srrl_nsrdb.pkl.gz')
ground = cs_detection.ClearskyDetection.read_pickle('srrl_ground.pkl.gz')
nsrdb.trim_dates('01-01-2010', '01-01-2011')
ground.trim_dates('10-01-2010', '01-01-2011')
vis = visualize.Visualizer()
vis.add_line_ser(nsrdb.df['GHI'], 'GHI')
vis.add_line_ser(nsrdb.df['Clearsky GHI'], 'Clearsky GHI')
vis.add_line_ser(nsrdb.df['Clearsky GHI pvlib'], 'Clearsky GHI pvlib')
vis.add_line_ser(nsrdb.df['Clearsky GHI stat'], 'Clearsky GHI stat')
vis.add_circle_ser(nsrdb.df[nsrdb.df['sky_status'] == 1]['GHI'], 'NSRDB clear')
vis.show()
PVLib is systematically higher than the NSRDB and statistical clearsky mdoels. It will also provide consistent behavior between different data sets, so that probably shouldn't be a large concern. All of the modeled GHI's look about the same despite the PVLib peaks being high. In general, I would agree with the NSRDB clearness metric. It looks like it misses some obvious points (based on GHI alone). It also picks points that are in 'noisy' periods that probably shouldn't be picked. Clearsky GHI stat has some issues where the curve is not smooth. This might have to be smooth/interpolated in a different way.
utils.mean_abs_diff(nsrdb.df['Clearsky GHI pvlib'], nsrdb.df['Clearsky GHI'])
utils.mean_abs_diff(nsrdb.df['Clearsky GHI pvlib'], nsrdb.df['Clearsky GHI stat'])
nsrdb.robust_rolling_smooth('Clearsky GHI stat', 3)
vis = visualize.Visualizer()
vis.add_line_ser(nsrdb.df['GHI'], 'GHI')
vis.add_line_ser(nsrdb.df['Clearsky GHI'], 'Clearsky GHI')
vis.add_line_ser(nsrdb.df['Clearsky GHI pvlib'], 'Clearsky GHI pvlib')
vis.add_line_ser(nsrdb.df['Clearsky GHI stat'], 'Clearsky GHI stat')
vis.add_line_ser(nsrdb.df['Clearsky GHI stat smooth'], 'Clearsky GHI stat smooth')
vis.show()
vis = visualize.Visualizer()
vis.add_line_ser(ground.df['GHI'], 'GHI')
vis.add_line_ser(ground.df['Clearsky GHI pvlib'], 'Clearsky GHI pvlib')
vis.add_line_ser(ground.df['Clearsky GHI stat'], 'Clearsky GHI stat')
vis.add_circle_ser(ground.df[ground.df['sky_status pvlib'] == 1]['GHI'], 'pvlib clear')
vis.show()
The statistical clearksy trend looks ok, but it's terribly noisy. We will try some smoothing techniques to provide a more reliable solution.
utils.mean_abs_diff(ground.df['Clearsky GHI pvlib'], ground.df['Clearsky GHI stat'])
Mean absolute difference is quite good. The noise is relatively small so this should be expected. The main worry with the noise is that window based metrics for determing sky clarity might be affected.
ground.robust_rolling_smooth('Clearsky GHI stat', 60)
vis = visualize.Visualizer()
vis.add_line_ser(ground.df['GHI'], 'GHI')
vis.add_line_ser(ground.df['Clearsky GHI pvlib'], 'Clearsky GHI pvlib')
vis.add_line_ser(ground.df['Clearsky GHI stat'], 'Clearsky GHI stat')
vis.add_line_ser(ground.df['Clearsky GHI stat smooth'], 'Clearsky GHI stat smooth')
vis.add_circle_ser(ground.df[ground.df['sky_status pvlib'] == 1]['GHI'], 'pvlib clear')
vis.show()
utils.mean_abs_diff(ground.df['Clearsky GHI pvlib'], ground.df['Clearsky GHI stat smooth'])
Smoothing did not make the statistical curve fit the pvlib curve dramatically better. In fact, it's almost negligible. The smoothness is what we desired though, and it looks to much more closely resemble the statistical curve in that respect.
ground.intersection(nsrdb.df.index)
nsrdb.intersection(ground.df.index)
vis = visualize.Visualizer()
vis.add_line_ser(ground.df['GHI'], 'Ground GHI')
vis.add_line_ser(nsrdb.df['GHI'], 'NSRDB GHI')
vis.add_line_ser(ground.df['Clearsky GHI pvlib'], 'PVLib GHI_cs') # PVLib clearsky will be the same for both (used same location)
vis.add_line_ser(nsrdb.df['Clearsky GHI'], 'NSRDB GHI_cs')
vis.add_line_ser(ground.df['Clearsky GHI stat smooth'], 'Ground GHI_cs smooth')
vis.add_line_ser(nsrdb.df['Clearsky GHI stat smooth'], 'NSRDB GHI_cs smooth')
vis.show()
All measurements seem to match well. Smoothing 1min and 30min data agrees well here. Care should be taken when selecting window sizes in future for different data frequencies.
utils.mean_abs_diff(ground.df['GHI'], nsrdb.df['GHI'])
utils.mean_abs_diff(ground.df['Clearsky GHI pvlib'], nsrdb.df['Clearsky GHI'])
utils.mean_abs_diff(ground.df['Clearsky GHI stat smooth'], nsrdb.df['Clearsky GHI stat smooth'])
Add the smoothed statistical clearsky to the original data frames and dump to file.
nsrdb = cs_detection.ClearskyDetection.read_pickle('srrl_nsrdb.pkl')
ground = cs_detection.ClearskyDetection.read_pickle('srrl_ground.pkl')
nsrdb.robust_rolling_smooth('Clearsky GHI stat', 3)
ground.robust_rolling_smooth('Clearsky GHI stat', 60)
nsrdb.to_pickle('srrl_nsrdb_1.pkl', overwrite=True)
ground.to_pickle('srrl_ground_1.pkl', overwrite=True)